import pandas as pd
import time
from datetime import timedelta

def load_csv_chunked(csv_path, chunksize=100000):
    start_time = time.time()
    
    df_chunks = []
    for chunk in pd.read_csv(
        csv_path,
        names=['user_id', 'item_id', 'category_id', 'behavior_type', 'timestamp'],
        header=None,
        chunksize=chunksize,
        low_memory=False
    ):
        df_chunks.append(chunk)
    
    df = pd.concat(df_chunks, ignore_index=True)
    del df_chunks  # 释放内存
    
    end_time = time.time()
    elapsed = end_time - start_time
    print(f"分块加载耗时: {str(timedelta(seconds=elapsed))}")
    return df

# 执行
csv_path = r'D:\data\公开数据集\淘宝用户行为数据\UserBehavior.csv'
df_chunked = load_csv_chunked(csv_path)
print(df_chunked.head())